---
title: "Where the Grass is Greener: Social Infrastructure and Resilience to COVID-19"
subtitle: "Replication Code"
author: "Timothy Fraser"
date: "May 7, 2021"
output: html_notebook
---

Much research has highlighted that community resources like social capital and soft policies that build social capital help communities prepare for and recover from disaster. We also know that social capital tends to be built through associations, like neighborhood organizations, parent-teacher associations, volunteer groups, unions, and more. And we know that these social capital tend to be built more in places with rich social infrastructure, like community centers, libraries, schools, public places, places of worship, etc. This idea was most clearly espoused by Eric Klinenberg in his 2018 book, Palaces for the People. 

https://www.thenatureofcities.com/2019/06/24/how-can-we-improve-social-infrastructure/


# 0. Packages

```{r, message = FALSE, warning = FALSE}
#dir.create("raw_data")

# Installation prep
#install.packages(c("tidyverse", "ggpubr", "remotes", "sf", "rgdal", "ggraph"))
#remotes::install_github("luukvdmeer/sfnetworks")
#remotes::install_github("hadley/dplyr")

# Data management
library(tidyverse)
library(ggpubr)

# GIS packages
library(sf)
library(rgdal)

# Plotting networks
library(sfnetworks)
library(dplyr) # newest version, so we can bind_rows() on sf objects
library(tidygraph)
library(ggraph)
```


## Japan polygons

One objective of this research is to map the network of movement throughout the US and Japan. While the US has its geocoded polygons available through the fantastic R package "tigris", Japan's municipal polygons are available online instead:

```{r}
dir.create("shapes")
# Download municipal boundaries
#download.file("http://www1.gsi.go.jp/geowww/globalmap-gsi/download/data/gm-japan/gm-jpn-all_u_2_2.zip", destfile = "shapes/japan.zip")
# Unzip them here
unzip("shapes/japan.zip", exdir = "shapes")
# Manually rename
file.rename(from = "shapes/gm-jpn-all_u_2_2", to = "shapes/japan")

```


```{r}
library(tidyverse)
library(sf)
library(rgdal)

# Get projection code for:
# Asia North Albers Equal Area Conic
#https://spatialreference.org/ref/esri/asia-north-albers-equal-area-conic/
aea <- "+proj=aea +lat_1=15 +lat_2=65 +lat_0=30 +lon_0=95 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"

# Get projection code for:
# WGS 84
# https://spatialreference.org/ref/epsg/wgs-84/proj4/
wgs <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"

# Get municipal polygons
read_sf("shapes/japan/polbnda_jpn.shp") %>%
  st_as_sf() %>%
  st_transform(crs = aea) %>% 
  group_by(muni_code = adm_code,
           pref_code = str_sub(muni_code, 1,2)) %>%
  summarize(geometry = st_union(geometry)) %>%
  ungroup() %>%
  saveRDS("shapes/muni.rds")

# For some reason, these shapefiles leave out Tomiya and Takizaki towns in Myagi and Iwate
# we're going to bind them back in
extra <- dplyr::bind_rows(
  data.frame(x = 39.734694, 
             y = 141.077056,
             muni_code =  "04216"),
  data.frame(x = 38.3822749,
             y = 140.8287513,
             muni_code =  "03216")) %>%
  st_as_sf(crs = aea, coords = c("y", "x"))

# Get municipal centroids
read_rds("shapes/muni.rds") %>%
  st_transform(crs = aea) %>%
  group_by(muni_code) %>%
  summarize(geometry = st_centroid(geometry, of_largest_polygon = TRUE)) %>%
  ungroup() %>%
  saveRDS("shapes/muni_centroids.rds")

# Now bind together with the extra points and save
dplyr::bind_rows(read_rds("shapes/muni_centroids.rds"), extra) %>% 
  saveRDS("shapes/muni_centroids.rds")


# Now get prefectural polygons too
read_rds("shapes/muni.rds") %>%
  st_transform(crs = aea) %>%
  group_by(pref_code) %>%
  summarize(geometry = st_union(geometry)) %>%
  ungroup() %>%
  saveRDS("shapes/pref.rds")

# Get prefectural centroids
read_rds("shapes/pref.rds") %>%
  st_transform(crs = aea) %>%
  group_by(pref_code) %>%
  summarize(geometry = st_centroid(geometry, of_largest_polygon = TRUE)) %>%
  ungroup() %>%
  saveRDS("shapes/pref_centroids.rds")

# Read in both kinds of centroids
dplyr::bind_rows(
  # Including municipality centroids
  read_rds("shapes/muni_centroids.rds") %>%
    rename(code = muni_code) %>%
    mutate(type = "Municipality"),
  # And prefecture centroids
  read_rds("shapes/pref_centroids.rds") %>%
    rename(code = pref_code) %>%
    mutate(type = "Prefecture")) %>%
  saveRDS("shapes/centroids.rds")

remove(extra)
```

# 1. Formatting

https://nlftp.mlit.go.jp/ksj/index.html

## 1.1 Cultural Facilities

https://nlftp.mlit.go.jp/ksj/index.html

```{r}
# Get projections
aea <- "+proj=aea +lat_1=15 +lat_2=65 +lat_0=30 +lon_0=95 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
wgs <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"

read_sf("shapes/cultural_facilities/P27-13.shp") %>%
  st_as_sf() %>%
  st_transform(crs = aea) %>%
  select(muni_code = "P27_001", 
         name = "P27_005", 
         public_type = "P27_002", 
         public_subtype = "P27_003",  
         type = "P27_004", 
         address = "P27_006",    
         floors = "P27_008", 
         year_built = "P27_009",
         geometry) %>%
  # recode
  mutate(public_type = public_type %>% dplyr::recode(
    "3" = "building", #  "建物",
    "9" =	"other",# "その他",
    "11" = "national institution", # "国の機関",
    "12"	= "local public org", #"地方公共団体",
    "13" = "welfare institution", # "厚生機関",
    "14"	= "policy department", # "警察機関",
    "15" =  "fire department", # "消防署",
    "16"	= "school", #  "学校",
    "17"	= "hospital", # "病院",
    "18"	= "post office", # "郵便局",
    "19" = "welfare centers" # "福祉施設"))
  )) %>%
  mutate(public_subtype = public_subtype %>% dplyr::recode(
    "03001" = "art gallery", # "美術館", 
    "03002" = "museum ,archive, memorial, science museum", # 資料館，記念館，博物館，科学館"
    "03003" = "library", #	"図書館" 
    "03004" = "aquarium", #	"水族館"
    "03005"	= "zoo/botanical garden", # "動植物園", 
    "09001" = "public enterprise/government org",	#公共企業体・政府関係機関
    "09002" = "independent agency/inter-university research institute", #"独立行政法人・大学共同利用機関法人"
    "11100" = "parliament building", # "国会"
    "11101" = "audit board", #"会計検査院"
    "11102" = "personnel authority", # "人事院"
    "11103" = "cabinet legislation bureau", # "内閣法制局",
    "11110" = "cabinet bureau", #	内閣府
    "11111" = "cabinet secretariat",#	内閣官房
    "11112" = "imperial household agency", #宮内庁
    "11113" = "financial services agency", #金融庁"
    "11114" =  "fair trade commission", #公正取引委員会"
    "11120" = "national public security commission", #	国家公安委員会
    "11121" = "national police agency",	#警察庁
    "11130" = "ministry of defense",#	防衛庁
    "11131"	= "defence facilities agency", #防衛施設庁
    "11140" = "ministry of internal affairs and communications", #総務省
    "11142" = "Fire and Disaster Management Agency", #消防庁"
    "11144"	 = "Environmental Dispute Coordination Committee", #公害等調整委員会"
    "11150" = "ministry of justice",#	法務省
    "11151" = "public prosecutors office", #検察庁
    "11152" = "public security intelligence agency",# "公安調査庁"
    "11153" = "public security examination committee", #	公安審査委員会
    "11160" = "ministry of foreign affairs", #外務省"
    "11161"	= "foreign embassy",# "外国公館",
    "11170" =	"ministry of finance",# "財務省"
    "11171" = "national tax agency", # "国税庁"
    "11180" = "ministry of education", #	文部科学省
    "11181" = "minstry of cultural affairs",#	文化庁
    "11190" = "Ministry of Health, Labor and Welfare", #	厚生労働省
    "11191" = "social insurance agency",# "社会保険庁"
    "11192" = "central labor relations commission",# "中央労働委員会"
    "11200" = "Ministry of Agriculture, Forestry and Fisheries",#	農林水産省
    "11202"	= "Forestry Agency", #"林野庁"
    "11203" = "Fisheries Agency", #水産庁"
    "11210" = "Ministry of Economy, Trade and Industry", #	経済産業省,
    "11211" = "Agency of Natural Resources and Energy",#	資源エネルギー庁
    "11212" = "Patent Office",# "特許庁"
    "11213" = "Small and Medium Business Administration", #中小企業庁"
    "11220" = "Ministry of Land, Infrastructure, Transport and Tourism", #国土交通省"
    "11221" =	"Japan Coast Guard",# "海上保安庁"
    "11222" =	"Japan Marine Accident Inquiry Agency", #海難審判庁"
    "11223" = "Japan Meteorological Agency",#	気象庁
    "11224" = "Sailor Labor Committee", #	船員労働委員会
    "11230" = "Ministry of Environment", #	環境省
    "11240" = "court",#	裁判所
    "12001" = "prefectural office", #	都道府県庁
    "12002" = "city hall", #	区役所（東京都），市役所
    "12003" = "ward office (designated cities)",#	区役所（政令指定都市）
    "12004"	= "town hall", #町村役場"
    "12005" =	"local prefectural agency", #都道府県の出先機関"
    "13001" = "health center",# "保健所"
    "16001" = "elementary school", #	小学校
    "16002" = "middle school",#	中学校
    "16003"	= "secondary school", #中等教育学校"
    "16004" = "senior high school", #高等学校
    "16005" = "technical school", #高等専門学校
    "16006" = "junior college", #短期大学"
    "16007" = "university", #	大学
    "16008" = "school for the blind",#	盲学校
    "16009" = "school for the deaf",#	ろう学校
    "16010" = "school for the disabled", #養護学校"
    "18001" =	"post office", #普通郵便局"
    "18002"	= "special post office (collection and delivery)", #特定郵便局（集配局）
    "18003" =	"special post office (no collector or devivery)", #特定郵便局（無集配局）
    "18004" = "simple post office", #	簡易郵便局
    "18005" = "local neighborhood office" #地域区分局"
  )) %>%
  mutate(type = type %>% dplyr::recode(
    "03001" = "art gallery", #	"美術館"
    "03002" = "museum ,archive, memorial, science museum", # 資料館，記念館，博物館，科学館"
    "03003" = "library", #図書館
    "03004" = "aquarium", #	"水族館"
    "03005"	= "zoo/botanical garden", # "動植物園", 
    "03101" =	"stadium", #"陸上競技場"
    "03102" =	"baseball/softball field", #野球場・ソフトボール場"
    "03103" =	"ballpark", #球技場"
    "03104" = "multipurpose playground",	#多目的運動場
    "03105"= 	"swimming pool (indoor)", #"水泳プール（屋内）"
    "03106" ="swimming pool (outdoor)",#	水泳プール（屋外）
    "03107" = "leisure pool", #"レジャープール"
    "03108" = "diving pool", #	ダイビングプール
    "03109"	= "gymnasium", #"体育館"
    "03110" =	"judo hall", #柔道場"
    "03111" =	"kendo hall", #剣道場", 
    "03112"	= "martial arts hall", #柔剣道場（武道場）"
    "03113" = "karate/aikido hall",	#空手・合気道場
    "03114" =	"outdoor volleyball court", #バレーボール場（屋外）"
    "03115" =	"tennis court (outdoor)", #庭球場（屋外）
    "03116" = "tennis court (indoor)", #庭球場（屋内）"
    "03117" = "basketball court (outdoor)", #	"バスケットボール場（屋外）"
    "03118" =	"sumo hall (outdoor)", #相撲場（屋外）
    "03119" = "sumo hall (indoor)", #相撲場（屋内）
    "03120" =	"ping pong hall", #卓球場"
    "03121" = "kyudo hall", #弓道場"
    "03122" =	"archery range", #アーチェリー場"
    "03123" =	"equestrian ground", #馬場"
    "03124" =	"ice skating rink (indoor)", #アイススケート場（屋内）"
    "03125" = "ice skating rink (outdoor)",	#アイススケート場（屋外）
    "03126"	= "roller skating rink (outdoor)",#ﾛｰﾗｰｽｹｰﾄ･ｲﾝﾗｲﾝｽｹｰﾄ場（屋内）
    "03127"	= "roller skating rink (indoor)",# ﾛｰﾗｰｽｹｰﾄ･ｲﾝﾗｲﾝｽｹｰﾄ場（屋外）
    "03128" = "mountain houses/forest schools", #山の家・林間学校等の施設"
    "03129" =	"training room", #トレーニング場"
    "03130"	= "classroom", #レスリング場"
    "03131" =	"boxing hall", #ボクシング場
    "03132" = "dance studio",	#ダンス場
    "03133"	= "shooting range", #射撃場
    "03134" = "golf course",	#ゴルフ場
    "03135" = "gold driving range", #	ゴルフ練習場
    "03136" = "bowling alley",#	ボウリング場
    "03137" = "rowing course",#	漕艇場
    "03138" =	"gateball/croquet field", #ゲートボール・クロッケー場"
    "03139" =	"squash/racquetball field", #スカッシュ・ラケットボール場"
    "03140"	= "yacht marina", #"ヨット場"
    "03141" =	"ski/snowboarding course", #スキー・スノーボード場"
    "03142" =	"camp site", #キャンプ場"
    "03143" =	"hiking course", #ハイキンギコース"
    "03144"	= "cycling course", #サイクリングコース"
    "03145"	= "orienteering course", #オリエンテーリングコース"
    "03146" =	"track", #ランニングコース"
    "03147" =	"adventure play course", #冒険遊具コース"
    "03148"	= "seaside recreation facilities/beaches", #海の家・海水浴場等の施設"
    "03149" =	"swimming pools (rivers/lakes)", #河川・湖沼等の遊泳場"
    "03150" = "sky sports facility"#	スカイスポーツ施設
  )) %>%
  saveRDS("shapes/cultural_facilities.rds")
```


## 1.2 City Offices and Public Meeting Spaces

```{r, message = FALSE, warning = FALSE}

#https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-P05.html
# Download each from link above, then group upzip them using code below
dir("shapes/city_hall_and_meeting_spaces/source_files", full.names = TRUE) %>%
  lapply(function(x) unzip(x, exdir = "shapes/city_hall_and_meeting_spaces", overwrite = TRUE))

data.frame(file = dir("shapes/city_hall_and_meeting_spaces", full.names = TRUE)) %>%
  filter(str_detect(file, ".shp")) %>%
  split(.$file) %>%
  map(~read_sf(.$file, options = "ENCODING=SHIFT-JIS") %>%
        st_as_sf() %>%
        st_set_crs(value = wgs) %>%
        st_transform(crs = aea)) %>%
  dplyr::bind_rows() %>%
  select(muni_code = P05_001,
         type = P05_002,
         name = P05_003,
         address = P05_004,
         geometry) %>%
  mutate(type = type %>% dplyr::recode(
    "1" = "city hall main branch", # "本庁（市役所、区役所、町役場、村役場）",
    "2" = "branch office", # "支所、出張所、連絡所",
    "3" = "public center (machizukuri)", #"上記以外の行政サービス施設",
    "4" = "community center", # "公立公民館",
    "5" = "public meeting area" # "集会施設"
  )) %>%
  saveRDS("shapes/city_hall_and_meeting_spaces.rds")
```

## 1.3 Schools

```{r}
# Download schools dataset here:
#https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-P29.html#prefecture00

read_sf("shapes/schools/P29-13.shp") %>%
  st_as_sf() %>%
  st_transform(crs = aea) %>%
  select(muni_code = P29_001, 
         subtype = P29_003,
         school_type = P29_004,
         name = P29_005,
         address = P29_006,
         level = P29_007,
         geometry) %>%
  mutate(level = level %>% dplyr::recode(
    "1" = "national", #	国
    "2" = "prefectural", #	都道府県
    "3" = "municipal", #	市区町村
    "4" = "private", #	民間
    "0" = "other"#	その他
  )) %>%
  mutate(school_type = school_type %>% dplyr::recode(
    "16001" =  "elementary", # "小学校",
    "16002" =  "middle", #	"中学校",
    "16003" =	"secondary",# "中等教育学校",
    "16004" =	"high school",# "高等学校",
    "16005" =	"technical school", # "高等専門学校",
    "16006" =	"junior college",# "短期大学",
    "16007" =	"university", # "大学",
    "16012" =	"special support school")) %>%
  saveRDS("shapes/schools.rds")
```

## 1.4 City Parks


```{r}
#https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-P13.html
# Download each from link above, then group upzip them using code below
#dir("shapes/city_parks/source_files", full.names = TRUE) %>%
#  lapply(function(x) unzip(x, exdir = "shapes/city_parks", overwrite = TRUE))

data.frame(file = dir("shapes/city_parks", full.names = TRUE)) %>%
  filter(str_detect(file, ".shp")) %>%
  split(.$file) %>%
  map(~read_sf(.$file, options = "ENCODING=SHIFT-JIS") %>%
        st_as_sf() %>%
        st_set_crs(value = wgs) %>%
        st_transform(crs = aea)) %>%
  dplyr::bind_rows() %>%
  select("name" = "P13_003",
         "type" ="P13_004", 
         "muni" = "P13_006",
         "pref" = "P13_005",
         "manage_muni" = P13_002,
         "manage_pref" = "P13_001",
         "year_start" = "P13_007", 
         "area_m2" = "P13_008",
         "city_planning_decision" = "P13_009",
         notes = "P13_010", geometry) %>%
  mutate(type = type %>% dplyr::recode(
    "1" = "block park", # "街区公園",
    "2" =	"neighborhood park", #近隣公園"
    "3"	= "district park", #地区公園（カントリーパーク）"
    "4"	= "general park", #総合公園"
    "5" = "sports park", #"運動公園"
    "6" =	"wide area park", #広域公園"
    "7" = "recreation center",#	レクリエーション都市
    "8" =	"national park", #国営公園"
    "9"	= "special parks (scenic parks, flora and fauna parks, historical parks, graveyards)", #特殊公園（風致公園、動植物公園、歴史公園、墓園）"
    "10"	= "buffer green space", #緩衝緑地"
    "11" = "urban green space", #	都市緑地
    "12" = "green road", #緑道"
    "13"	= "municipal forest", #都市林"
    "14" =	"square park" #広場公園"
  )) %>%
  saveRDS("shapes/city_parks.rds")

ggplot() +
  geom_sf(data = read_rds("shapes/pref.rds"), alpha = 0.5, color = "grey") +
  geom_sf(data = read_rds("shapes/city_parks.rds") %>% 
  sample_n(size = 5) ) +
  theme_void()
```

## 1.5 Evacuation Shelters

```{r}
# https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-P20.html
# Download evacuation shelters from here
dir("shapes/evacuation_shelters/source_files", full.names = TRUE) %>%
  lapply(function(x) unzip(x, exdir = "shapes/evacuation_shelters", overwrite = TRUE))

data.frame(file = dir("shapes/evacuation_shelters", full.names = TRUE)) %>%
  filter(str_detect(file, ".shp")) %>%
  split(.$file) %>%
  map(~read_sf(.$file) %>%
        st_as_sf() %>%
        st_set_crs(value = wgs) %>%
        st_transform(crs = aea)) %>%
  dplyr::bind_rows() %>%
  select(
    muni_code = P20_001, 
    name = P20_002, 
    address = P20_003,
    type = P20_004, 
    capacity = P20_005,
    scale_m2 = P20_006, 
    quake = P20_007, 
    tsunami = P20_009,
    volcano = P20_010, 
    other_crisis = P20_011, 
    crisis_not_specified = P20_012, 
    geometry) %>%
  saveRDS("shapes/evacuation_shelters.rds")
```

## 1.6 Densely Inhabited Districts

https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-A16-v2_3.html#prefecture00

```{r}

read_sf("shapes/did/A16-15_00_DID.shp") %>%
  st_as_sf() %>%
  st_set_crs(value = wgs) %>%
  st_transform(crs = aea) %>%
  select(did_id = 1,
         muni_code = 2, 
         muni_name = 3, 
         two_did_same_city = 4,
         pop = 5,
         area_km2 = 6,
         pop_prev = 7,
         area_km2_prev = 8,
         pop_ratio = 9,
         area_ratio = 10,
         date = 11,
         geometry) %>%
  saveRDS("shapes/did.rds")

```

## 1.7 Prefectural Land Price Survey

```{r}
#Download nationwide data from year Reiwa 2
#https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-L02-v2_7.html#prefecture00

read_sf("shapes/land_price/L02-20.shp") %>% 
  st_as_sf() %>%
  st_transform(crs = aea) %>%
  # Remove entries of forested properties, 
  # as these are written in different units (yen per 10 acres)
  filter(!L02_048 %in% c("地森計", "地森計・保安林")) %>%
  select(year = L02_005,
         survey_price_yenm2 = L02_006,
         muni_code = L02_021,
         neighborhood = L02_022,
         address = L02_023,
         area_m2 = L02_024,
         current_usage = L02_025,
         # Previous year's land price in yen per m2,
         # starting in Showa 58
         L02_054:L02_091,
         geometry) %>%
  saveRDS("shapes/land_price.rds")

```

## 1.8 Density (estimated)

```{r, eval = FALSE}
# Extract files en masse from their many prefectural zipfiles
#dir("shapes/pop/source_files", full.names = TRUE) %>%
#  lapply(function(x) unzip(x, exdir = "shapes/pop", overwrite = TRUE))

aea <- "+proj=aea +lat_1=15 +lat_2=65 +lat_0=30 +lon_0=95 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"

data.frame(file = dir("shapes/pop", full.names = TRUE)) %>%
  filter(str_detect(file, ".shp")) %>%
  split(.$file) %>%
  map(~read_sf(.$file) %>%
        st_as_sf() %>%
        st_transform(crs = aea) %>%
        select(cell_id = MESH_ID,
               muni_code = SHICODE,
               pop_2020 = PTN_2020,
               #pop_men_2020 = PMN_2020,
               #pop_women_2020 = PFN_2020,
               rate_0_14_2020 = RTA_2020,
               rate_15_64_2020 = RTB_2020,
               rate_65_plus_2020 = RTC_2020,
               geometry)) %>%
  dplyr::bind_rows() %>%
    mutate(muni_code = str_pad(muni_code, width = 5, side = "left", pad = "0"))  %>%
  saveRDS("shapes/pop.rds")


# Codebook here
#https://nlftp.mlit.go.jp/ksj/gml/datalist/mesh500_1000_h30_datalist.pdf

# This data esimates for every five years after 2015
# the population, with age-breakdown, for every 1000 square meter block in the country. 


pop <- read_rds("shapes/pop.rds") %>%
  filter(str_sub(muni_code, 1,2) == "40")

pref <- read_rds("shapes/pref.rds") %>% 
            filter(pref_code == "40")
muni <- read_rds("shapes/muni.rds")

ggplot() +
  geom_sf(data = muni, color = "darkgrey", size = 5) +
  geom_sf(data = pop, mapping = aes(fill = pop_2020), color = NA) +
  viridis::scale_fill_viridis(option = "plasma") +
  guides(fill = guide_colorsteps(barwidth = 0.5, barheight = 10)) +
  theme_void()

remove(pop, muni, pref)
```

## 1.9 Population

```{r}
aea <- "+proj=aea +lat_1=15 +lat_2=65 +lat_0=30 +lon_0=95 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"

#https://www.e-stat.go.jp/gis/statmap-search?page=1&type=2&aggregateUnitForBoundary=A&toukeiCode=00200521&toukeiYear=2015&serveyId=A002005212015&prefCode=40&coordsys=1&format=shape

# Let's import and adjust the neighborhoods for fukuoka
read_sf("shapes/poly/h27ka40.shp") %>%
  st_as_sf() %>%
  st_transform(crs = aea)  %>%
  filter(GST_NAME == "福岡市") %>%
  # Zoom into just neighborhoods, excluding wards, which are duplicative
  # Filter out water
  filter(S_AREA != "999000") %>%
  # Remove that one island way off in the left hand corner that skews the map
  filter(X_CODE != min(X_CODE)) %>%
  rename(pop = JINKO,
         households = SETAI,
         area_m2 = AREA) %>%
  saveRDS("shapes/neighborhoods.rds")

# Identify ward boundaries
read_rds("shapes/neighborhoods.rds") %>% 
  mutate(ward_code = str_sub(KEY_CODE, 1,5)) %>%
  group_by(ward_code, ward = CITY_NAME) %>%
  summarize(geometry = st_union(geometry),
            pop = sum(pop, na.rm = TRUE),
            households = sum(households, na.rm = TRUE),
            area_m2 = sum(area_m2, na.rm = TRUE)) %>%
  ungroup() %>%
  saveRDS("shapes/wards.rds")
```

## 1.10 Demographics

```{r, message = FALSE, warning = FALSE}
# Next, download neighborhood demographic data
# https://www.e-stat.go.jp/gis/statmap-search?page=1&type=1&toukeiCode=00200521

# Download data for Fukuoka, and grab total population, men, women, and ages 65+ in 2015
read_delim("shapes/poly/age_gender.txt", delim = ",", skip = 1,
           locale = locale(encoding = "SHIFT-JIS")) %>%
  select(code = 1, level = 2, city = 3, neighborhood = 4,
         pop = `総数、年齢「不詳」含む`,
         pop_men = `男の総数、年齢「不詳」含む`,
         pop_women = `女の総数、年齢「不詳」含む`,
         pop_15_64 = `　総数１５〜６４歳`,
         pop_65_plus = `　総数６５歳以上`) %>%
  filter(str_detect(city, "福岡市")) %>%
  # Let's update that with a zero so it's computable
  mutate_at(vars(pop:pop_65_plus), funs(parse_number(.))) %>%
  mutate_at(vars(pop:pop_65_plus), funs(if_else(is.na(.), 0, as.numeric(.)))) %>%
  mutate(code = as.character(code)) %>% 
  saveRDS("shapes/age_gender.rds")
```

## 1.11 Employment

```{r, message = FALSE, warning = FALSE}
read_delim("shapes/poly/employees_by_industry.txt", delim = ",", skip = 1,
           locale = locale(encoding = "SHIFT-JIS")) %>%
  select(code = 1, level = 2, city = 3, neighborhood = 4,
         total = "総数",
         farming_forestry = "Ａ　農業、林業", self_farming = "　うち農業",
         fishing = "Ｂ　漁業", 
         mining = "Ｃ　鉱業、採石業、砂利採取業",
         construction = "Ｄ　建設業",
         manufacturing = "Ｅ　製造業",
         electric_gas_water = "Ｆ　電気・ガス・熱供給・水道業",
         info_communication = "Ｇ　情報通信業",
         transport_postal = "Ｈ　運輸業、郵便業",
         wholesale_retail = "Ｉ　卸売業、小売業",
         finance = "Ｊ　金融業、保険業",
         real_estate_leasing = "Ｋ　不動産業、物品賃貸業",
         research_tech = "Ｌ　学術研究、専門・技術サービス業",
         hospitality_restaraunts = "Ｍ　宿泊業、飲食サービス業",
         lifestyle_entertainment = "Ｎ　生活関連サービス業、娯楽業",
         education = "Ｏ　教育、学習支援業",
         medicine_welfare = "Ｐ　医療、福祉",
         complex_service = "Ｑ　複合サービス事業",
         other_services = "Ｒ　サービス業（他に分類されないもの）",
         public_affairs = "Ｓ　公務（他に分類されるものを除く）",
         unclassified = "Ｔ　分類不能の産業",
         employers = "雇用者（役員を含む）",
         self_employed = "自営業主（家庭内職者を含む）",
         family_employee = "家族従業者")  %>%
  filter(str_detect(city, "福岡市")) %>%
  # Sometimes, they mark 0 with a "-", as in, we didn't find anyone
  # Let's update that with a zero so it's computable
  mutate_at(vars(total:family_employee), funs(parse_number(.))) %>%
  mutate_at(vars(total:family_employee), funs(if_else(is.na(.), 0, as.numeric(.)))) %>%
  # Finally, let's create some simple indicators of employement, meant to help delineate socioeconomic status
  mutate(
    high_paying = (finance + real_estate_leasing + research_tech + medicine_welfare) / total,
    tertiary_sector = (info_communication + transport_postal + wholesale_retail +
                         finance + real_estate_leasing + 
                         research_tech + hospitality_restaraunts +
                         lifestyle_entertainment + education + 
                         medicine_welfare + complex_service + other_services + public_affairs) / total,
    secondary_sector = (construction + manufacturing + electric_gas_water) / total,
    primary_sector = (farming_forestry + fishing + mining) / total) %>%
  mutate(code = as.character(code)) %>%
  saveRDS("shapes/employees_by_industry.rds")
```



# 2. Interpolation

```{r}
library(tidyverse)
library(sf)
library(rgdal)
library(dplyr)
library(gstat)


read_rds("shapes/neighborhoods.rds") %>%
  mutate(pop_density = pop / (area_m2 / 1000000),
         household_density = households / (area_m2 / 1000000)) %>%
  select(code = KEY_CODE, pop_density, household_density, geometry) %>%
  left_join(by = "code", 
            y = read_rds("shapes/age_gender.rds"))  %>% 
  # Calculate % by gender/age
  mutate(pop_men = pop_men / pop * 100,
         pop_women = pop_women / pop * 100,
         pop_65_plus = pop_65_plus / pop * 100) %>%
  # Join in employment data
  left_join(by = "code", 
            y = read_rds("shapes/employees_by_industry.rds") %>%
              select(code, total, high_paying:primary_sector)) %>%
  mutate_at(vars(high_paying, tertiary_sector, 
                 secondary_sector, primary_sector), funs(. * 100)) %>%  
  # Calculate percentage of working age adults who are employed
  mutate(employment_rate = total / pop_15_64 * 100) %>%
  saveRDS("shapes/neighborhoods_data.rds")

shapes <- read_rds("shapes/neighborhoods_data.rds")

# The shapefile is measured in meters,
# so when we make the fishnet cellsize 500,
# that means 500 m (0.5 km)!
shapes %>%
  # Make one giant polygon
  summarize(geometry = st_union(geometry)) %>%
  st_make_grid(cellsize = 500, what = "polygons") %>%
  st_as_sf() %>%
  select(geometry = x) %>%
  st_join(shapes) %>%
  filter(!is.na(code)) %>%
  select(geometry) %>%
  # Zoom into just distinct grid-cells,
  # because we tend to make duplicates due to overlaps
  distinct() %>%
  mutate(id = 1:n()) %>%
  select(id, geometry) %>%
  saveRDS("shapes/fishnet.rds")

fish <- read_rds("shapes/fishnet.rds")

ggplot() +
  geom_sf(data = fish, color = "steelblue", fill = "black")  +
  geom_sf(data = shapes, color = "white", fill = NA, size = 0.25) +
  theme_void()
```

## COVID Data

```{r, message = FALSE, warning = FALSE}
#Patient ward data since April 2020
#https://ckan.open-governmentdata.org/dataset/401000_pref_fukuoka_covid19_patients/resource/44c15434-6082-4b61-bad5-d7e98bbaeef1

dir("shapes/covid", full.names = TRUE)[1] %>%
  read_csv() %>%
  select(
    id = `No`,
    date = `公表_年月日`,
    ward = `居住地`,
    age = `年代`,
    gender = `性別`,
    infection_route_unknown = `感染経路不明`,
    close_contact = `濃厚接触者`,
    traveled_abroad = `海外渡航歴有`) %>%
  filter(str_detect(ward, "福岡市")) %>%
  filter(ward != "福岡市") %>%
  mutate(ward = str_remove(ward, "福岡市")) %>%
  # This dataset include 9,055 patients in Fukuoka City
  # 309 of them were not geoloated to a specific ward - we exclude them from this analysis
  # Let's code these by month
  mutate(month = date %>% lubridate::floor_date(unit = "months", week_start = 1)) %>%
  saveRDS("shapes/covid.rds")


# Import fishnet grid
fish <- read_rds("shapes/fishnet.rds")

# Get suicides per 100,000 people
wards <- read_rds("shapes/wards.rds") %>%
  left_join(by = c("ward" = "ward"),
            y = read_rds("shapes/covid.rds") %>%
              group_by(ward, month) %>%
              summarize(cases = n(),
                        cases_10_under = sum(age == "10歳未満"),
                        cases_20 = sum(age ==  "20代"),
                        cases_30 = sum(age == "30代"),
                        cases_40 = sum(age == "40代"),
                        cases_50 = sum(age == "50代"),
                        cases_60 = sum(age == "60代"),
                        cases_70 = sum(age == "70代"),
                        cases_80 = sum(age == "80代"),
                        cases_90_plus = sum(age == "90代以上"),
                        cases_men = sum(gender == "男性"),
                        cases_women = sum(gender == "女性")) %>%
              ungroup()) %>%
  # Calculate covid outcome rates based on our calculated population from 2015 census
  mutate_at(vars(cases:cases_women), funs(. / pop * 100000))

wards %>%
  mutate(ward = ward %>% dplyr::recode(
    "東区" = "East",
    "西区" = "West",
    "博多区" = "Hakata",
    "南区" = "South",
    "中央区" = "Central",
    "城南区" = "Jonan",
    "早良区" = "Sawara")) %>%
  ggplot(mapping = aes(x = month, y = cases, color = reorder(ward, -pop))) +
  geom_line() +
  geom_point(size = 2) +
  theme_minimal(base_size = 14) +
  labs(x = "Month", y = "COVID-19 Case Rates\nper 100,000 residents",
       color = "Fukuoka City\nWards\n(sorted\nby population)") +
  scale_color_manual(values = c("#6CED4B", "#648FFF", "#785EF0", "#B02986", "#DC267F", "#FE6100","#FFB000")) +
  #scale_y_log10() +
  ggsave("viz/covid_rates_by_ward_line.png", dpi = 500, width = 8, height = 4)



read_rds("shapes/wards.rds") %>%
  left_join(by = c("ward" = "ward"),
            y = read_rds("shapes/covid.rds") %>%
              group_by(ward, month) %>%
              summarize(cases = n(),
                        cases_10_under = sum(age == "10歳未満"),
                        cases_20 = sum(age ==  "20代"),
                        cases_30 = sum(age == "30代"),
                        cases_40 = sum(age == "40代"),
                        cases_50 = sum(age == "50代"),
                        cases_60 = sum(age == "60代"),
                        cases_70 = sum(age == "70代"),
                        cases_80 = sum(age == "80代"),
                        cases_90_plus = sum(age == "90代以上"),
                        cases_men = sum(gender == "男性"),
                        cases_women = sum(gender == "女性")) %>%
              ungroup()) %>%
  group_by(month) %>%
  mutate(total = sum(cases,na.rm = TRUE)) %>%
  mutate(ward = ward %>% dplyr::recode(
    "東区" = "East",
    "西区" = "West",
    "博多区" = "Hakata",
    "南区" = "South",
    "中央区" = "Central",
    "城南区" = "Jonan",
    "早良区" = "Sawara")) %>%
  ggplot(mapping = aes(x = month, y = cases / pop, fill = reorder(ward, -pop))) +
  geom_col(position = "fill", color = "white") +
 # geom_point(size = 2) +
  theme_classic(base_size = 14) +
  scale_y_continuous(breaks = c(0,.25, .5, .75, 1), labels = c(0, 25, 50, 75, 100)) +
  theme(panel.border = element_rect(fill = NA, color = "black")) +
  labs(x = "Month", y = "(%) Share of COVID-19 Cases per capita",
       fill = "Fukuoka City\nWards\n(sorted\nby population)") +
  scale_fill_manual(values = c("#6CED4B", "#648FFF", "#785EF0", "#B02986", "#DC267F", "#FE6100","#FFB000")) +
  ggsave("viz/covid_rates_percentage.png", dpi = 500, width = 7, height = 5)
```


```{r}
get_covid_rates = function(mymonth){
  gstat::gstat(id = "id", formula = cases ~ 1, 
                      data = wards %>%
                        filter(month == mymonth) %>%
                        na.omit() %>%
                        as("Spatial"),
                      set = list(idp = 2)) %>%
          predict(fish) %>%
          dplyr::select(predicted = id.pred, geometry) %>%
          # Add in a unique id from the original grid cells
          mutate(id = fish$id) %>%
          mutate(month = mymonth) %>%
    return()
}


# Get COVID Rates per week
# Takes a while
system.time(
  data.frame(month = wards$month %>% unique() %>% sort()) %>%
    split(.$month) %>%
    # Make a grid of shapes from the polygon
    map(~get_covid_rates(.$month)) %>%
    dplyr::bind_rows() %>%
    saveRDS("shapes/interpolate/case_rate.rds")
)





get_covid_rates_women = function(mymonth){
  gstat::gstat(id = "id", formula = cases_women ~ 1, 
                      data = wards %>%
                        filter(month == mymonth) %>%
                        na.omit() %>%
                        as("Spatial"),
                      set = list(idp = 2)) %>%
          predict(fish) %>%
          dplyr::select(predicted = id.pred, geometry) %>%
          # Add in a unique id from the original grid cells
          mutate(id = fish$id) %>%
          mutate(month = mymonth) %>%
    return()
}


# Get COVID Rates per week
# Takes a while
system.time(
  data.frame(month = wards$month %>% unique() %>% sort()) %>%
    split(.$month) %>%
    # Make a grid of shapes from the polygon
    map(~get_covid_rates_women(.$month)) %>%
    dplyr::bind_rows() %>%
    saveRDS("shapes/interpolate/case_rate_women.rds")
)



rm(list = ls())
```

## Suicide Data

Let's also grab some suicide data, collected each month for the last year.

```{r}
#https://www.mhlw.go.jp/stf/seisakunitsuite/bunya/0000197204_00006.html
data.frame(file = dir("shapes/suicide", full.names = TRUE)) %>%
  split(.$file) %>%
  map_dfr(~readxl::read_excel(.$file, skip = 3) %>%
            slice(-c(1:3)) %>%
            select(muni_code = `市区町村コード`,
                   #muni = `市区町村名`,
                   ward = `政令指定都市詳細`,
                   suicides = `自殺者数`,
                   suicide_rate = `自殺死亡率`,
                   suicide_rate_annualized = `年換算した自殺死亡率`) %>%
            filter(str_detect(ward, "福岡市")), .id = "month") %>%
  mutate(month = str_extract(month, "[0-9]{4}-[0-9]+") %>% lubridate::ym()) %>%
  saveRDS("shapes/suicides.rds")

# Import fishnet grid
fish <- read_rds("shapes/fishnet.rds")

# Get suicides per 100,000 people
wards <- read_rds("shapes/wards.rds") %>%
  left_join(by = c("ward_code" = "muni_code"), y = read_rds("shapes/suicides.rds") %>%
              mutate(muni_code = str_sub(muni_code, 1,5)) %>%
              select(-ward)) %>%
  # Calculate suicide rate based on our calculated population from 2015 census
  mutate(suicide_rate = suicides / pop * 100000)


get_suicide_rates = function(mymonth){
  gstat::gstat(id = "id", formula = suicide_rate ~ 1, 
                      data = wards %>%
                        filter(month == mymonth) %>%
                        na.omit() %>%
                        as("Spatial"),
                      set = list(idp = 2)) %>%
          predict(fish) %>%
          dplyr::select(predicted = id.pred, geometry) %>%
          # Add in a unique id from the original grid cells
          mutate(id = fish$id) %>%
          mutate(month = mymonth) %>%
    return()
}

# Get Suicide Rate
# Takes about ~20 seconds
system.time(
  data.frame(month = wards$month %>% unique() %>% sort()) %>%
    split(.$month) %>%
    # Make a grid of shapes from the polygon
    map(~get_suicide_rates(.$month)) %>%
    dplyr::bind_rows() %>%
  saveRDS("shapes/interpolate/suicide_rate.rds")
)

```

## Voting Data

```{r}
read_rds("shapes/wards.rds") %>% 
  # Join in voter turnout data
  left_join(by = "ward", y = readxl::read_excel("shapes/voting/fukuoka_election.xlsx") %>%
  filter(party == "turnout") %>%
  pivot_longer(cols = c(`東区`:`西区`), names_to = "ward", values_to = "voter_turnout") %>%
  select(ward, voter_turnout)) %>%
  # Join in electoral outcome data for those who voted for the parties in control of government
  left_join(by = "ward", y = readxl::read_excel("shapes/voting/fukuoka_election.xlsx") %>%
  filter(!is.na(candidate)) %>%
  pivot_longer(cols = c(`東区`:`西区`), names_to = "ward", values_to = "votes") %>%
  # Votes for either the Komeito candidate or for the Jiminto Candidate
  group_by(ward, party = if_else(party %in% c("公明党", "自由民主党"), "party_in_power", "other")) %>%
  summarize(votes = sum(votes, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(ward) %>%
  mutate(total = sum(votes,na.rm = TRUE),
         voteshare = votes / total * 100) %>%
  filter(party == "party_in_power") %>%
  ungroup() %>%
  select(ward, voteshare)) %>%
  saveRDS("shapes/voting.rds")

shapes <- read_rds("shapes/voting.rds")

# Get Voter Turnout
# Takes about ~20 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = voter_turnout ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/voter_turnout.rds")
)


# Get Support for Winning Party
# Takes about ~20 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = voteshare ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/voteshare.rds")
)

```
## Ward Covariates

```{r}
shapes <- read_rds("shapes/wards.rds") %>% 
  # Join in voter turnout data
  left_join(by = "ward", y = readxl::read_excel("shapes/ward_covariates/fukuoka_stats.xlsx")) %>%
  mutate_at(vars(hospitals_2017:physicians_2016, crimes_2019),
            funs(. / pop * 1000))

# Import fishnet grid
fish <- read_rds("shapes/fishnet.rds")

# Get Rate of Hospitals per 1000 residents
# Takes about ~20 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = hospitals_2017 ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/hospitals.rds")
)


# Get Rate of Doctors per 1000 residents
# Takes about ~20 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = physicians_2016 ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/physicians.rds")
)



# Get Clinic Rate per 1000 residents
# Takes about ~20 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = clinics_2017 ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/clinics.rds")
)


# Get Unemployment Rate per 1000 residents
# Takes about ~20 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = unemployment_2015 ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/unemployment.rds")
)

# Get Crime Rate per 1000 residents
# Takes about ~20 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = crimes_2019 ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/crime_rate.rds")
)
```


## Demographics

```{r}
dir.create("shapes/interpolate")
# Import fishnet grid
fish <- read_rds("shapes/fishnet.rds")

# Get population density
# Takes about ~20 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = pop_density ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/pop_density.rds")
)

# Get household density
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = household_density ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/household_density.rds")
)


# Get % women
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = pop_women ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/pop_women.rds")
)


# Get % age 65 plus
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = pop_65_plus ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/pop_65_plus.rds")
)



# Get Employment Rate
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = employment_rate ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/employment_rate.rds")
)



# Get % Employees working in High-Paying Service Sector Jobs
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = high_paying ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/high_paying.rds")
)



# Get % Employees working in Tertiary Sector
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = tertiary_sector ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/tertiary_sector.rds")
)


# Get % Employees working in Secondary Sector
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = secondary_sector ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/secondary_sector.rds")
)


# Get % Employees working in Primary Sector
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = primary_sector ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/primary_sector.rds")
)

remove(shapes)

# Import land prices from prefectural survey
points <- read_rds("shapes/land_price.rds") %>%
  # Zoom into Fukuoka (~900 properties surveyed)
  filter(str_sub(muni_code, 1,2) == "40") 

```

## Land Prices

```{r}
# Get Real Estate Value in yen / m2
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = survey_price_yenm2 ~ 1, 
               data = points %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/survey_price_yenm2.rds")
)

remove(points)
```

## City Parks

```{r}

parks <- read_rds("shapes/city_parks.rds") %>%
  filter(str_detect(muni, "福岡市"))  %>%
  # We know the area of these parks. 
  # Although we don't know specifically their geometry, we could estimate their space
  # by taking their area in meters squared and back-tracking 
  # to the hypothetical radius of a circle,
  # emanating out from the point at which the park is geolocated.
  # this circle would have the same area as the park
  #A=pi*r^2, therefore radius = sqrt(A) / pi)
  mutate(radius_m = sqrt(area_m2 / pi)) %>%
  mutate(geometry = st_buffer(geometry, dist = radius_m)) %>%
  # Now take the fishnet grid of 500 meter squares and overlay it,
  # splicing up the parks records into each fishnet grid cell that
  # overlaps with a park buffer zone
  st_intersection(fish) %>%
  # Calculate the extent of the overlap
  # in terms of area over overlap in square meters
  mutate(overlap = st_area(geometry) %>% as.numeric())

green <- fish %>%
  left_join(by = "id", y = parks %>% as.data.frame() %>% select(id, overlap)) %>%
  group_by(id) %>%
  summarize(green_space = sum(overlap, na.rm = TRUE),
            geometry = st_union(geometry))


# While technically this literally tallies the amount of overlap in greenspace per block...

shapes <- read_rds("shapes/neighborhoods_data.rds")

ggplot() +
  geom_sf(data = shapes, 
          fill = "black", color = "lightgrey") +
  geom_sf(data = parks, color = "#54DA0D") +
  theme_void() 

ggplot() +
#  geom_sf(data = read_rds("shapes/interpolate/pop_density.rds"),
#          mapping = aes(fill = predicted), color = NA) +
  geom_sf(data = shapes, 
          fill = "black", color = "lightgrey") +
  geom_sf(data = green, mapping = aes(fill = green_space)) +
  theme_void()  +
  viridis::scale_fill_viridis(option = "viridis")

# Let's interpolate the level of green space - in case we missed any parks.

# Get estimate amount of Green Space in square meters
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = green_space ~ 1, 
               data = green %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/green_spacem2.rds")
)


remove(green, parks, shapes)


ggplot() +
  geom_sf(data = read_rds("shapes/interpolate/green_spacem2.rds"), 
          mapping = aes(fill = log(predicted) )) +
  theme_void()  +
  viridis::scale_fill_viridis(option = "viridis")
```


## Public Meeting Spaces

```{r}
# I'd like to break these out into two different types
# 1) community centers, public halls, and machizukuri centers,
# and 2) city hall and branch offices

# Import Fukuoka City neighborhoods
shapes <- read_rds("shapes/neighborhoods.rds") %>%
  st_join(read_rds("shapes/city_hall_and_meeting_spaces.rds") %>%
            filter(str_sub(muni_code, 1,2) == "40")) %>%
  filter(!is.na(type)) %>%
  mutate(type = type %>% dplyr::recode(
    "city hall main branch" = "city hall facilities",
    "branch office" = "city hall facilities",
    "public meeting area" = "community centers",
    "community center" = "community centers",
    "public center (machizukuri)" = "community centers")) %>%
  group_by(code = KEY_CODE, type) %>%
  summarize(rate = n() / unique(pop),
            geometry = unique(geometry))


# Get estimate amount of Community Centers per capita
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = rate ~ 1, 
               data = shapes %>%
                 filter(type == "community centers") %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/community_centers.rds")
)



# Get estimate amount of local government facilities per capita
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = rate ~ 1, 
               data = shapes %>%
                 filter(type == "city hall facilities") %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/city_hall_facilities.rds")
)

remove(shapes)
```

## Cultural Facilities

```{r}
# Import cultural facilities
sites <- read_rds("shapes/cultural_facilities.rds") %>%
  # Zoom into Fukuoka Prefecture
  filter(str_sub(muni_code, 1,2) == "40") %>%
  # Zoom into Fukuoka City area
  st_join(read_rds("shapes/neighborhoods.rds")) %>%
  filter(!is.na(KEY_CODE)) %>%
  select(type, geometry) %>%
  # Let's break these into three types:
  # sports
  # culture / history
  # libraries
  mutate(type = case_when(
    type %in% c("zoo/botanical garden", "aquarium", 
                "museum ,archive, memorial, science museum", 
                "art gallery") ~ "educational",
    type %in% c("library") ~ "libraries",
    TRUE ~ "sports"))

# Import Fukuoka City neighborhoods
shapes <- read_rds("shapes/neighborhoods.rds") %>%
  # Join in 
  st_join(sites) %>%
  filter(!is.na(type)) %>%

  group_by(code = KEY_CODE, type) %>%
  summarize(rate = n() / unique(pop),
            geometry = unique(geometry)) %>%
  ungroup() %>%
  # some places have 0 population (eg. woods)
  # these aren't really comparable as neighborhoods, 
  # and result in infinity scores; 
  # I will remove these entries from the interpolation process.
  filter(!is.infinite(rate))


# Get estimate amount of libraries per capita
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = rate ~ 1, 
               data = shapes %>%
                 filter(type == "libraries") %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/libraries.rds")
)


# Get estimate amount of sports facilities per capita
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = rate ~ 1, 
               data = shapes %>%
                 filter(type == "sports") %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/sports.rds")
)


# Get estimate amount of educational sites per capita
# (aquariums, museums, zoos, botanical gardens)
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = rate ~ 1, 
               data = shapes %>%
                 filter(type == "educational") %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/educational.rds")
)
```


## Schools

```{r}
# Import cultural facilities
sites <- read_rds("shapes/schools.rds") %>%
  # Zoom into Fukuoka Prefecture
  filter(str_sub(muni_code, 1,2) == "40") %>%
  # Zoom into Fukuoka City area
  st_join(read_rds("shapes/neighborhoods.rds")) %>%
  filter(!is.na(KEY_CODE)) %>%
  select(type = school_type, geometry)

# Import Fukuoka City neighborhoods
shapes <- read_rds("shapes/neighborhoods.rds") %>%
  # Join in 
  st_join(sites) %>%
  filter(!is.na(type)) %>%

  group_by(code = KEY_CODE) %>%
  summarize(rate = n() / unique(pop),
            geometry = unique(geometry)) %>%
  ungroup() %>%
  # some places have 0 population (eg. woods)
  # these aren't really comparable as neighborhoods, 
  # and result in infinity scores; 
  # I will remove these entries from the interpolation process.
  filter(!is.infinite(rate))

# Get estimate amount of schools per capita
# Takes about ~35 seconds
system.time(
  # Make a grid of shapes from the polygon
  gstat::gstat(id = "id", formula = rate ~ 1, 
               data = shapes %>%
                 na.omit() %>%
                 as("Spatial"),
               set = list(idp = 2)) %>%
    predict(fish) %>%
    dplyr::select(predicted = id.pred, geometry) %>%
    # Add in a unique id from the original grid cells
    mutate(id = fish$id) %>%
    saveRDS("shapes/interpolate/schools.rds")
)

remove(fish, shapes, sites)
```



# 3. Mapping


```{r}
dir("shapes/interpolate", full.names = TRUE)
# For each cell, please read it in
read_rds("shapes/fishnet.rds") %>%
  left_join(by = "id", 
            y = data.frame(file = dir("shapes/interpolate", full.names = TRUE)) %>%
              # But don't include the outcome variables; they are time-series
              filter(str_detect(file, "case_rate|suicide", negate = TRUE)) %>%
              split(.$file) %>%
              map_dfr(~ read_rds(.$file) %>%
                        as.data.frame() %>%
                        select(id, predicted), .id = "type") %>%
              mutate(type = str_remove(type, "shapes/interpolate/") %>%
                       str_remove(".rds")) %>%
              pivot_wider(id_cols = id, names_from = type, values_from = predicted)) %>%
  # let's log transform several of these which are counts
  # to get more evenly skewed measures,
  # then rescale them from 0 to 1
  mutate_at(vars(city_hall_facilities, community_centers, 
                 educational, green_spacem2, libraries,
                 schools, sports, libraries),
            funs(log(. * 1000) %>% scales::rescale(to = c(0, 1)))) %>%
  # Calculate a social infrastructure index
  mutate(social_infra = (city_hall_facilities + community_centers +
                 educational + green_spacem2 + libraries +
                 schools + sports + libraries) / 7) %>%
  saveRDS("shapes/fishnet_data.rds")


# libraries, schools, sports, community centers, 
# educational centers, city hall facilities, green space

# Let's visualize their distributions
read_rds("shapes/fishnet_data.rds") %>%
  as.data.frame() %>%
  pivot_longer(cols = -c(id, geometry), names_to = "type", values_to = "value") %>%
  ggplot(mapping = aes(x = value, fill = type)) +
  geom_histogram() +
  facet_wrap(~type, scales = "free_x") +
  guides(fill = FALSE)
```


## Social Infrastructure Mapping

```{r}

fish <- read_rds("shapes/fishnet.rds") %>%
  left_join(by = "id", y = read_rds("shapes/fishnet_data.rds") %>%
              as.data.frame() %>% select(-geometry) %>%
              pivot_longer(cols = -c(id), names_to = "type", values_to = "value")) %>%
  filter(type %in% c("social_infra", "city_hall_facilities", "community_centers", 
                     "educational", "green_spacem2", "libraries",
                     "schools", "sports", "libraries")) %>%
  group_by(type) %>%
  mutate(level = case_when(
    value < 2 ~ "2 or less",
    value >= 2 & value > 1 ~ "-2 to -1",
    value >= 1 & value > 0 ~ "-1 to 0",
    value == 0 ~ "0",
    value <= 1 & value > 0 ~ "0 - 1",
    value <= 2 & value > 1 ~ "1 - 2",
    value > 2 ~ "2 or more") %>% 
      factor(levels = c("2 or less", "-2 to -1", "-1 to 0",
                        "0", "0 to 1", "1 to 2", "2 or more"))) %>%
  ungroup() %>%
  mutate(type = type %>% recode_factor(
    "social_infra" = "Social Infrastructure\nIndex",
    "libraries" = "Libraries\nper 1000 residents", 
    "schools" = "Schools\nper 1000 residents", 
    "educational" = "Museums, Galleries,\nZoos, Aquariums,\n and Gardens\nper 1000 residents",
    "sports" = "Sport Grounds\nper 1000 residents", 
    "community_centers" = "Community Centers\nper 1000 residents",
    "city_hall_facilities" = "Public Facilities\nper 1000 residents",
    "green_spacem2" = "Green Space\n(square meters)"))

#dir.create("viz")
shapes <- read_rds("shapes/neighborhoods.rds")
back <- shapes %>% summarize(geometry = st_union(geometry))
wards <- read_rds("shapes/wards.rds")


ggplot() +
  geom_sf(data = back, fill = "black", color = "darkgrey", size = 5) +
  geom_sf(data = fish %>% 
            group_by(type) %>%
            mutate(value = scale(value)), 
          mapping = aes(fill = value), 
          color = NA, size = 0.15) +
  geom_sf(data = shapes, fill = NA, color = "grey", size = 0.1, alpha = 0.5) +
  geom_sf(data = wards, fill = NA, color = "black", size = 0.5, alpha = 0.5) +
  theme_void(base_size = 14) +
  #scale_fill_viridis(option = "plasma") +
  scale_fill_gradient2(low = "#DC267F", mid = "white", midpoint = 0, high = "#648FFF") +
  guides(fill = guide_colorsteps()) +
  facet_wrap(~type, ncol = 4) +
  labs(fill = "Z-Score") +
  ggsave("viz/social_infra_components_scaled.png", dpi = 500, width = 7, height = 6)



ggplot() +
  geom_sf(data = back, fill = "black", color = "darkgrey", size = 5) +
  geom_sf(data = fish, mapping = aes(fill = value), 
          color = NA, size = 0.15) +
  geom_sf(data = shapes, fill = NA, color = "grey", size = 0.1, alpha = 0.5) +
  geom_sf(data = wards, fill = NA, color = "black", size = 0.5, alpha = 0.5) +
  theme_void(base_size = 14) +
  scale_fill_viridis(option = "plasma") +
  #scale_fill_gradient2(low = "#DC267F", mid = "white", midpoint = 0, high = "#648FFF") +
  guides(fill = guide_colorsteps()) +
  facet_wrap(~type, ncol = 4) +
  labs(fill = "Score") +
  ggsave("viz/social_infra_components.png", dpi = 500, width = 7, height = 5.5)


```


```{r}
fish <- read_rds("shapes/fishnet_data.rds")
shapes <- read_rds("shapes/neighborhoods.rds")
back <- shapes %>% summarize(geometry = st_union(geometry))
wards <- read_rds("shapes/wards.rds")

ggplot() +
  geom_sf(data = back, fill = "black", color = "darkgrey", size = 5) +
  geom_sf(data = fish %>%
            filter(type == "social_infra"), 
          mapping = aes(fill = scale(value)), color = NA, size = 0.15) +
  geom_sf(data = shapes, fill = NA, color = "grey", size = 0.25) +
  geom_sf(data = wards, fill = NA, color = "black", size = 1) +
  theme_void(base_size = 14) +
  scale_fill_viridis(option = "viridis") +
  #scale_fill_gradient2(low = "#DC267F", mid = "white", midpoint = 0, high = "#648FFF") +
  guides(fill = guide_colorsteps())

rm(list = ls())
```


## Outcome Mapping

```{r}
# Get overall case rates
read_rds("shapes/interpolate/case_rate.rds") %>%
  select(id, month, geometry) %>%
  # Join in a wide data.frame of outcomes
  left_join(by = c("id","month"), y = bind_rows(
    read_rds("shapes/interpolate/case_rate.rds") %>% 
      as.data.frame() %>%
      mutate(type = "case_rate"),
    read_rds("shapes/interpolate/case_rate_women.rds") %>% 
      as.data.frame() %>%
      mutate(type = "case_rate_women"),
    read_rds("shapes/interpolate/suicide_rate.rds") %>% 
      as.data.frame() %>%
      mutate(type = "suicide_rate")) %>%
      select(id, predicted, type, month) %>%
      pivot_wider(id_cols = c(id, month), names_from = type, values_from = predicted))  %>%
  left_join(by = "id", y = read_rds("shapes/fishnet_data.rds") %>%
              as.data.frame() %>% select(-geometry)) %>%
  saveRDS("covid_data.rds")
```

```{r}
# Import covid data monthly
fish <- read_rds("covid_data.rds")
shapes <- read_rds("shapes/neighborhoods.rds")
back <- shapes %>% summarize(geometry = st_union(geometry))
wards <- read_rds("shapes/wards.rds")

#fish %>%
#  as.data.frame() %>%
#  group_by(month) %>%
#  summarize(case_rate = sum(is.na(case_rate)) / n(),
#            suicide_rate = sum(is.na(suicide_rate)) / n())

ggplot() +
  geom_sf(data = back, fill = "black", color = "darkgrey", size = 5) +
  geom_sf(data = fish, 
          mapping = aes(fill = log(case_rate) ), color = NA, size = 0.10) +
#  geom_sf(data = shapes, fill = NA, color = "grey", size = 0.25) +
  geom_sf(data = wards, fill = NA, color = "black", size = 1) +
  theme_void(base_size = 14) +
  scale_fill_viridis(option = "plasma") +
  #scale_fill_gradient2(low = "#DC267F", mid = "white", midpoint = 0, high = "#648FFF") +
  guides(fill = guide_colorsteps()) +
  facet_wrap(~month, ncol = 7) +
  labs(fill = "Cases\nper\n100,000\nresidents\n(log)") +
  ggsave("viz/covid_rates.png", dpi = 500, width = 10, height = 4)

```


```{r}
# Next, I'm going to calculate the average COVID outcome for each grid cell,
# since it's less clear over time.


fish <- read_rds("covid_data.rds") %>%
  group_by(id) %>%
  summarize(case_rate = mean(case_rate, na.rm = TRUE),
            case_rate_women = mean(case_rate_women, na.rm = TRUE),
            suicide_rate = mean(suicide_rate, na.rm = TRUE), 
            geometry = unique(geometry))


p1 <- ggplot() +
  geom_sf(data = back, fill = "black", color = "darkgrey", size = 5) +
  geom_sf(data = fish, 
          mapping = aes(fill = log(case_rate) ), color = "white", size = 0.10) +
#  geom_sf(data = shapes, fill = NA, color = "grey", size = 0.25) +
  geom_sf(data = wards, fill = NA, color = "black", size = 1) +
  theme_void(base_size = 14) +
  scale_fill_viridis(option = "plasma") +
  #scale_fill_gradient2(low = "#DC267F", mid = "white", midpoint = 0, high = "#648FFF") +
  guides(fill = guide_colorsteps(barwidth = 10, barheight = 0.5)) +
  #facet_wrap(~month, ncol = 7) +
  theme(plot.subtitle = element_text(hjust = 0.5),
        plot.margin = margin(0,0,0,0, "cm"),
        legend.position = "bottom") +
  labs(fill = "Case\nRate (log)",
       subtitle = "COVID-19 Case Rate\nper 100,000 residents") 
  #ggsave("viz/mapping_covid.png", dpi = 500, width = 5, height = 5)

p2 <- ggplot() +
  geom_sf(data = back, fill = "black", color = "darkgrey", size = 5) +
  geom_sf(data = fish, 
          mapping = aes(fill = log(case_rate_women) ), color = "white", size = 0.10) +
#  geom_sf(data = shapes, fill = NA, color = "grey", size = 0.25) +
  geom_sf(data = wards, fill = NA, color = "black", size = 1) +
  theme_void(base_size = 14) +
  scale_fill_viridis(option = "plasma") +
  #scale_fill_gradient2(low = "#DC267F", mid = "white", midpoint = 0, high = "#648FFF") +
  guides(fill = guide_colorsteps(barwidth = 10, barheight = 0.5)) +
  #facet_wrap(~month, ncol = 7) +
  theme(plot.subtitle = element_text(hjust = 0.5),
        plot.margin = margin(0,0,0,0, "cm"),
        legend.position = "bottom") +
  labs(fill = "Case Rate\nfor Women (log)",
       subtitle = "COVID-19 Case Rate for Women\nper 100,000 resident") 
  #ggsave("viz/mapping_covid_women.png", dpi = 500, width = 5, height = 5)


p3 <- ggplot() +
  geom_sf(data = back, fill = "black", color = "darkgrey", size = 5) +
  geom_sf(data = fish, 
          mapping = aes(fill = log(suicide_rate) ), color = "white", size = 0.10) +
#  geom_sf(data = shapes, fill = NA, color = "grey", size = 0.25) +
  geom_sf(data = wards, fill = NA, color = "black", size = 1) +
  theme_void(base_size = 14) +
  scale_fill_viridis(option = "plasma") +
  #scale_fill_gradient2(low = "#DC267F", mid = "white", midpoint = 0, high = "#648FFF") +
  guides(fill = guide_colorsteps(barwidth = 10, barheight = 0.5)) +
  #facet_wrap(~month, ncol = 7) +
  theme(plot.subtitle = element_text(hjust = 0.5),
        plot.margin = margin(0,0,0,0, "cm"),
        legend.position = "bottom") +
  labs(fill = "Suicide\nRate (log)",
       subtitle = "Suicide Rate\nper 100,000 residents") 
  #ggsave("viz/mapping_suicide.png", dpi = 500, width = 5, height = 5)

ggpubr::ggarrange(p1,p2,p3, nrow = 1) +
  ggsave("viz/mapping_outcome.png", dpi = 500, width = 11, height = 5)
```



## Neighborhoods Map

```{r}
# Import ward and neighborhood data
ward <- read_rds("shapes/wards.rds") %>%
  mutate(ward = ward %>% dplyr::recode(
    "東区" = "East",
    "西区" = "West",
    "博多区" = "Hakata",
    "南区" = "South",
    "中央区" = "Central",
    "城南区" = "Jonan",
    "早良区" = "Sawara")) 
poly <- read_rds("shapes/neighborhoods.rds")

ggplot() +
  geom_sf(data = ward, fill = "white", color = "darkgrey", size = 5) +
  geom_sf(data = poly, mapping = aes(fill = pop), 
          color = "white", size = 0.1) +
  geom_sf(data = ward, fill = NA, color = "white", size = 0.5) +
  geom_sf_label(data = ward, color = "black", 
                mapping = aes(label = ward), nudge_y = 10, size = 2.5) +
  #geom_sf(data = pref, fill = NA, color = "black", size = 1) +
  theme_void() +
  viridis::scale_fill_viridis(option = "plasma") +
  labs(fill = "Population") +
  guides(fill = guide_colorsteps(barwidth = 0.5, barheight = 10)) +
  ggsave("viz/neighborhoods.png", dpi = 500, width = 5, height = 4.5)
```
## Grid Map

```{r}
fishnet <- read_rds("shapes/fishnet_data.rds")
ward <- read_rds("shapes/wards.rds")
poly <- read_rds("shapes/neighborhoods.rds")

ggplot() +
  geom_sf(data = ward, fill = "white", color = "darkgrey", size = 5) +
  geom_sf(data = poly, mapping = aes(fill = pop), 
          color = NA, size = 0.1) +
  #geom_sf(data = pref, fill = NA, color = "black", size = 1) +
  geom_sf(data = fishnet, fill = NA, color = "white", size = 0.2) +
  geom_sf(data = ward, fill = NA, color = "black", size = 0.5) +
  theme_void() +
  viridis::scale_fill_viridis(option = "plasma") +
  labs(fill = "Population") +
  guides(fill = guide_colorsteps(barwidth = 0.5, barheight = 10)) +
  ggsave("viz/neighborhoods_grid.png", dpi = 500, width = 5, height = 4.5)
```


## COVID Grid Map

```{r}
fishnet <- read_rds("covid_data.rds") %>%
  group_by(id) %>%
  summarize(case_rate = mean(case_rate, na.rm = TRUE),
            social_infra = unique(social_infra),
            geometry = unique(geometry)) %>%
  ungroup() %>%
  mutate(social_infra = if_else(scale(social_infra) > 0, "Above Mean", "Below Mean"))

ward <- read_rds("shapes/wards.rds") %>%
    mutate(ward = ward %>% dplyr::recode(
    "東区" = "East",
    "西区" = "West",
    "博多区" = "Hakata",
    "南区" = "South",
    "中央区" = "Central",
    "城南区" = "Jonan",
    "早良区" = "Sawara")) 

poly <- read_rds("shapes/neighborhoods.rds")

ggplot() +
  geom_sf(data = ward, fill = "white", color = "darkgrey", size = 5) +
  #geom_sf(data = poly, mapping = aes(fill = pop), 
  #        color = NA, size = 0.1) +
  #geom_sf(data = pref, fill = NA, color = "black", size = 1) +
  geom_sf(data = fishnet, mapping = aes(fill = case_rate), 
          color = NA, size = 0.2) +
  geom_sf(data = fishnet, mapping = aes(color = social_infra), fill = NA, size = 0.2) +
  scale_color_manual(values = c("white", NA_character_)) +
  geom_sf(data = poly, fill = NA, color = "darkgrey", size = 0.1) +
  geom_sf(data = ward, fill = NA, color = "black", size = 0.5) +
  geom_sf_label(data = ward, mapping = aes(label = ward), color = "black", size = 3) +
  theme_void(base_size = 14) +
  theme(plot.caption = element_text(hjust = 0.5)) +
  viridis::scale_fill_viridis(option = "plasma") +
  labs(fill = "COVID-19\nCase Rates\nper\n100,000\nresidents",
       caption = "White grid cells indicate\nSocial Infrastructure Index above mean.") +
  guides(color = FALSE) +
  guides(fill = guide_colorsteps(barwidth = 0.5, barheight = 10)) +
  ggsave("viz/covid_grid.png", dpi = 500, width = 5, height = 4.5)
```

## Fukuoka

```{r}
mybbox <- c(xmin = 2834474, xmax = 3800000, ymin = -218811.5, ymax = 2904158.2)

muni <- read_rds("shapes/muni.rds") %>%
  filter(str_sub(muni_code, 1,2) != "47") %>%
  st_crop(y = mybbox) %>%
  left_join(by = c("muni_code", "pref_code"), y = read_csv("shapes/muni_code.csv"))  %>%
  mutate(fukuoka = if_else(str_detect(muni, "福岡市"), "Fukuoka", NA_character_))

pref <- read_rds("shapes/pref.rds") %>%
  filter(pref_code != "47") %>% 
  st_crop(y = mybbox)


ggplot() +
  geom_sf(data = pref, fill = NA, color = "lightblue", size = 2, alpha = 0.10) +
  geom_sf(data = pref, fill = "white", color = NA) +
  geom_sf(data = muni, mapping = aes(fill = fukuoka), color = "lightgrey", size = 0.01) +
  geom_sf(data = pref, fill = NA, color = "black", size = 0.1) +
  geom_sf_label(data = muni, mapping = aes(label = fukuoka), 
                nudge_x = 100000, nudge_y = 100000) +
  scale_fill_manual(values = c("#DC267F", "white")) +
  theme_void() +
  guides(fill = FALSE) +
  ggsave("viz/japan_map.png", dpi = 500, width = 3, height = 5)
```




# 4. Modeling

```{r}
library(texreg)
library(plm)

fish <- read_rds("covid_data.rds") %>%
  as.data.frame() %>%
  mutate(health_care = (scale(hospitals) + scale(clinics) +
                          scale(physicians)) / 3)  %>%
  mutate(health_care = ntile(health_care, 8)) %>%
  mutate(voter_turnout = ntile(voter_turnout, 10)) %>%
  mutate_at(vars(pop_density, pop_women, pop_65_plus,
                 survey_price_yenm2, employment_rate,
                 tertiary_sector, secondary_sector,
                 crime_rate, voter_turnout, voteshare,
                 health_care, social_infra, libraries, 
                 schools, green_spacem2,
                 educational, city_hall_facilities,
                 community_centers,sports),
            funs(scale(.))) %>%
    group_by(id) %>%
  arrange(id, month) %>%
  mutate(lag_case_rate = lag(case_rate, 1),
         lag_case_rate_women = lag(case_rate_women, 1),
         lag_suicide_rate = lag(suicide_rate, 1))
# Lagged variables avoided, because they lead to untenable increases in colinearity in the fixed effects model.
```

## COVID19 Rates

```{r}
# Run a standard linear model
m0 <- fish %>% 
  lm(formula = log(case_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       #crime_rate + 
       voter_turnout + voteshare +
       health_care + 
       social_infra + factor(month)) 
#m0 %>% car::vif() %>% .^2
# major colinearity problem between unemployment and voter turnout,
# so I use the rate of working-age individuals employed (sort of as a control for what share of the population is going out and about)

# This leads to high colinearity between crime_Rate and voter_turnout
# I'm going to break voter turnout into chunks

# Pretty cool - 
# 1) bonding is negatively correlated; 
# 2) bridging is negatively correlated, 
# 3) linking is POSITIVELY correlated, and 
# 4) social infrastructure is negatively correlated with case rates.

# Now run it in plm
m1 <- fish %>%
  plm::plm(formula = log(case_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       social_infra, effect = "time", model = "within",
       index = c("id", "month"))

m2 <- fish %>%
  plm::plm(formula = log(case_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       social_infra, effect = "time", model = "random",
       index = c("id", "month"))

# Evaluate collinearity based on random effects, since that's how we'll interpret it.
m2 %>% car::vif() # that's better
# The null hypothesis of the Hausman test is that the errors are uncorrelated;
# we did not reject the null hypothesis;
# they are uncorrelated; use random effects
phtest(m1,m2)

# Next, let's repeat, but use all the social infrastructre predictors


# Run a standard linear model
m0 <- fish %>% 
  lm(formula = log(case_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       # Social Infrastructure
       libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports +  
       factor(month)) 
# Check colinearity
m0 %>%
  car::vif() %>%
  .^2

m3 <- fish %>%
  plm::plm(formula = log(case_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
        libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports, effect = "time", model = "within",
       index = c("id", "month"))
m4 <- fish %>%
  plm::plm(formula = log(case_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
        libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports, effect = "time", model = "random",
       index = c("id", "month"))

texreg::htmlreg(
  list(m1,m2, m3,m4),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  file = "viz/case_rates.html",
  caption.above = TRUE,
  single.row = TRUE,
  caption = "<b>OLS Panel Models of logged COVID-19 Case Rates per 100,000 residents over 13 months (n = 21476)</b><br><i>among 500 square meter grid cells (n = 1652) from February 2020 through February 2021</i>",
  custom.header = list("Overall Model of Social Infrastructure" = 1:2,
                       "Model of Social Infrastructure Subtypes" = 3:4),
  custom.model.names = c("Fixed Effects", "Random Effects",
                         "Fixed Effects", "Random Effects"),
  custom.coef.map = list(
    # Social Infrastructure
        "social_infra" = "Social Infrastructure Index",
    "libraries" = "Libraries per 1000 residents",
    "schools" = "Schools per 1000 residents",
    "green_spacem2" = "Green Space (square meters)",
    "educational" = "Museums, Galleries, Zoos, Aquariums,<br>& Botanical Gardens per 1,000 residents",
    "community_centers" = "Community Centers per 1,000 residents", 
    "city_hall_facilities" = "Public Buildings per 1,000 residents",
    "sports" = "Sports Grounds per 1,000 residents",
    # Social Capital
        "crime_rate" = "Crime Rate per 1,000 residents",
    "voter_turnout" = "Voter Turnout in 2019 Upper House Election",
    "voteshare" = "% Winning Party Vote, 2019 Upper House Election",
    "health_care" = "Health Care Capacity Index",
    # Demographics
    "pop_density" = "Population Density per 1,000 residents",
    "pop_women" = "(%) Women",
    "pop_65_plus" = "(%) Ages 65+",
    "survey_price_yenm2" = "Land Price (yen/m2)",
    "employment_rate" = "(%) Employed (ages 15-64)",
    "tertiary_sector" = "(%) Tertiary Sector",
    "secondary_sector" = "(%) Secondary Sector",
    "(Intercept)" = "Constant"),
  groups = list(
    "<b>Social Infrastructure</b>" = 1:8,
    "<b>Social Capital</b>" = 9:11,
    "<b>Health Care Exposure</b>" = 12,
    "<b>Demographic Vulnerability</b>" = 13:15,
    "<b>Socioeconomic Vulnerability</b>" = 16:19),
  #include.fstatistic = TRUE, # Can't seem to get this to work
  custom.gof.names = c(
    rep(NA, 3),
    "Variance (Grid Cells)",
    "Variance (Months)"),
  custom.note = "*** p < 0.001; ** p < 0.01; * p < 0.05; . p < 0.1. All predictors rescaled; beta coefficients should be read as the predicted increase in the log-monthly case rate given a one standard deviation increase in the predictor. Fixed/random effects included in models but omitted from tables to conserve space. Variance Inflation Factor (VIF) scores ranged from 1.4 to 7.9, with a mean of 3.3, far below the threshold for problematic multicolinearity (VIF = 10). To reduce colinearity, unemployment rates were replaced by employment rates among working-age residents, the health care index was broken into eight quantiles, and voter turnout was broken into ten quantiles.")

#(car::vif(m0)^2)[,3] %>% mean()
```


## Women's COVID19 Rates

```{r}
# Run a standard linear model
m0 <- fish %>% 
  lm(formula = log(case_rate_women) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       social_infra + factor(month)) 
# Check colinearity
m0 %>%
  car::vif() %>%
  .^2

# major colinearity problem between unemployment and voter turnout,
# so I use the rate of working-age individuals employed (sort of as a control for what share of the population is going out and about)

# This leads to high colinearity between crime_Rate and voter_turnout
# I'm going to break voter turnout into chunks

# Pretty cool - 
# 1) bonding is negatively correlated; 
# 2) bridging is negatively correlated, 
# 3) linking is POSITIVELY correlated, and 
# 4) social infrastructure is negatively correlated with case rates.

# Now run it in plm
m1 <- fish %>%
  plm::plm(formula = log(case_rate_women) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       social_infra, effect = "time", model = "within",
       index = c("id", "month"))

m2 <- fish %>%
  plm::plm(formula = log(case_rate_women) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       social_infra, effect = "time", model = "random",
       index = c("id", "month"))


# The null hypothesis of the Hausman test is that the errors are uncorrelated;
# we did not reject the null hypothesis;
# they are uncorrelated; use random effects
phtest(m1,m2)

# Next, let's repeat, but use all the social infrastructre predictors


# Run a standard linear model
m0 <- fish %>% 
  lm(formula = log(case_rate_women) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       # Social Infrastructure
       libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports + 
       factor(month)) 
# Check colinearity
m0 %>%
  car::vif() %>%
  .^2

m3 <- fish %>%
  plm::plm(formula = log(case_rate_women) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
        libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports, effect = "time", model = "within",
       index = c("id", "month"))
m4 <- fish %>%
  plm::plm(formula = log(case_rate_women) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
        libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports, effect = "time", model = "random",
       index = c("id", "month"))


texreg::htmlreg(
  list(m1,m2, m3,m4),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  file = "viz/case_rates_women.html",
  caption.above = TRUE, 
  single.row = TRUE,
  caption = "<b>OLS Panel Models of logged COVID-19 Case Rates among Women per 100,000 residents over 13 months (n = 21476)</b><br><i>among 500 square meter grid cells (n = 1652) from February 2020 through February 2021</i>",
  custom.header = list("Overall Model of Social Infrastructure" = 1:2,
                       "Model of Social Infrastructure Subtypes" = 3:4),
  custom.model.names = c("Fixed Effects", "Random Effects",
                         "Fixed Effects", "Random Effects"),
   custom.coef.map = list(
    # Social Infrastructure
        "social_infra" = "Social Infrastructure Index",
    "libraries" = "Libraries per 1000 residents",
    "schools" = "Schools per 1000 residents",
    "green_spacem2" = "Green Space (square meters)",
    "educational" = "Museums, Galleries, Zoos, Aquariums,<br>& Botanical Gardens per 1,000 residents",
    "community_centers" = "Community Centers per 1,000 residents", 
    "city_hall_facilities" = "Public Buildings per 1,000 residents",
    "sports" = "Sports Grounds per 1,000 residents",
    # Social Capital
        "crime_rate" = "Crime Rate per 1,000 residents",
    "voter_turnout" = "Voter Turnout in 2019 Upper House Election",
    "voteshare" = "% Winning Party Vote, 2019 Upper House Election",
    "health_care" = "Health Care Capacity Index",
    # Demographics
    "pop_density" = "Population Density per 1,000 residents",
    "pop_women" = "(%) Women",
    "pop_65_plus" = "(%) Ages 65+",
    "survey_price_yenm2" = "Land Price (yen/m2)",
    "employment_rate" = "(%) Employed (ages 15-64)",
    "tertiary_sector" = "(%) Tertiary Sector",
    "secondary_sector" = "(%) Secondary Sector",
    "(Intercept)" = "Constant"),
  groups = list(
    "<b>Social Infrastructure</b>" = 1:8,
    "<b>Social Capital</b>" = 9:11,
    "<b>Health Care Exposure</b>" = 12,
    "<b>Demographic Vulnerability</b>" = 13:15,
    "<b>Socioeconomic Vulnerability</b>" = 16:19),
  #include.fstatistic = TRUE, # Can't seem to get this to work
  custom.gof.names = c(
    rep(NA, 3),
    "Variance (Grid Cells)",
    "Variance (Months)"),
  custom.note = "*** p < 0.001; ** p < 0.01; * p < 0.05; . p < 0.1. All predictors rescaled; beta coefficients should be read as the predicted increase in the log-monthly case rate given a one standard deviation increase in the predictor. Fixed/random effects included in models but omitted from tables to conserve space. Variance Inflation Factor (VIF) scores ranged from 1.4 to 7.9, with a mean of 3.3, far below the threshold for problematic multicolinearity (VIF = 10). To reduce colinearity, unemployment rates were replaced by employment rates among working-age residents, the health care index was broken into eight quantiles, and voter turnout was broken into ten quantiles.")

#(car::vif(m0)^2)[,3] %>% mean()
```

## Suicide Rates

```{r}
# Run a standard linear model
m0 <- fish %>% 
  lm(formula = log(suicide_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       social_infra + factor(month)) 
# Check colinearity
m0 %>%
  car::vif() %>%
  .^2

# major colinearity problem between unemployment and voter turnout,
# so I use the rate of working-age individuals employed (sort of as a control for what share of the population is going out and about)

# This leads to high colinearity between crime_Rate and voter_turnout
# I'm going to break voter turnout into chunks

# Pretty cool - 
# 1) bonding is negatively correlated; 
# 2) bridging is negatively correlated, 
# 3) linking is POSITIVELY correlated, and 
# 4) social infrastructure is negatively correlated with case rates.

# Now run it in plm
m1 <- fish %>%
  plm::plm(formula = log(suicide_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       social_infra, effect = "time", model = "within",
       index = c("id", "month"))

m2 <- fish %>%
  plm::plm(formula = log(suicide_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       social_infra, effect = "time", model = "random",
       index = c("id", "month"))


# The null hypothesis of the Hausman test is that the errors are uncorrelated;
# we did not reject the null hypothesis;
# they are uncorrelated; use random effects
phtest(m1,m2)

# Next, let's repeat, but use all the social infrastructre predictors


# Run a standard linear model
m0 <- fish %>% 
  lm(formula = log(suicide_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       # Social Infrastructure
       libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports + 
       factor(month)) 
# Check colinearity
m0 %>%
  car::vif() %>%
  .^2

m3 <- fish %>%
  plm::plm(formula = log(suicide_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
        libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports, effect = "time", model = "within",
       index = c("id", "month"))
m4 <- fish %>%
  plm::plm(formula = log(suicide_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
        libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports, effect = "time", model = "random",
       index = c("id", "month"))


texreg::htmlreg(
  list(m1,m2, m3,m4),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  file = "viz/suicide_rates.html",
  caption.above = TRUE, single.row = TRUE,
  caption = "<b>OLS Panel Models of logged Suicide Rates per 100,000 residents during COVID-19 over 12 months (n = 19824)</b><br><i>among 500 square meter grid cells (n = 1652) from February 2020 through February 2021</i>",
  custom.header = list("Overall Model of Social Infrastructure" = 1:2,
                       "Model of Social Infrastructure Subtypes" = 3:4),
  custom.model.names = c("Fixed Effects", "Random Effects",
                         "Fixed Effects", "Random Effects"),
  custom.coef.map = list(
    # Social Infrastructure
        "social_infra" = "Social Infrastructure Index",
    "libraries" = "Libraries per 1000 residents",
    "schools" = "Schools per 1000 residents",
    "green_spacem2" = "Green Space (square meters)",
    "educational" = "Museums, Galleries, Zoos, Aquariums,<br>& Botanical Gardens per 1,000 residents",
    "community_centers" = "Community Centers per 1,000 residents", 
    "city_hall_facilities" = "Public Buildings per 1,000 residents",
    "sports" = "Sports Grounds per 1,000 residents",
    # Social Capital
        "crime_rate" = "Crime Rate per 1,000 residents",
    "voter_turnout" = "Voter Turnout in 2019 Upper House Election",
    "voteshare" = "% Winning Party Vote, 2019 Upper House Election",
    "health_care" = "Health Care Capacity Index",
    # Demographics
    "pop_density" = "Population Density per 1,000 residents",
    "pop_women" = "(%) Women",
    "pop_65_plus" = "(%) Ages 65+",
    "survey_price_yenm2" = "Land Price (yen/m2)",
    "employment_rate" = "(%) Employed (ages 15-64)",
    "tertiary_sector" = "(%) Tertiary Sector",
    "secondary_sector" = "(%) Secondary Sector",
    "(Intercept)" = "Constant"),
  groups = list(
    "<b>Social Infrastructure</b>" = 1:8,
    "<b>Social Capital</b>" = 9:11,
    "<b>Health Care Exposure</b>" = 12,
    "<b>Demographic Vulnerability</b>" = 13:15,
    "<b>Socioeconomic Vulnerability</b>" = 16:19),
  #include.fstatistic = TRUE, # Can't seem to get this to work
  custom.gof.names = c(
    rep(NA, 3),
    "Variance (Grid Cells)",
    "Variance (Months)"),
  custom.note = "*** p < 0.001; ** p < 0.01; * p < 0.05; . p < 0.1. All predictors rescaled; beta coefficients should be read as the predicted increase in the log-monthly case rate given a one standard deviation increase in the predictor. Fixed/random effects included in models but omitted from tables to conserve space. Variance Inflation Factor (VIF) scores ranged from 1.4 to 7.9, with a mean of 3.3, far below the threshold for problematic multicolinearity (VIF = 10). To reduce colinearity, unemployment rates were replaced by employment rates among working-age residents, the health care index was broken into eight quantiles, and voter turnout was broken into ten quantiles.")

#(car::vif(m0)^2)[,3] %>% mean()
```

# 5. Simulation

## Social Infrastructure

```{r}
# Fortunately, the random and fixed effects models deliver virtually idenical results, even if the Hausman tests indicates a slight preference for the random effects

# This means we can easily make simulations in the Zelig package,
# which works well with fixed effects

library(Zelig)
library(tidyverse)
fish <- read_rds("covid_data.rds") %>%
  as.data.frame() %>%
    mutate(health_care = (scale(hospitals) + scale(clinics) +
                          scale(physicians)) / 3)  %>%
  mutate(health_care = ntile(health_care, 8)) %>%
  mutate(voter_turnout = ntile(voter_turnout, 10)) %>%
  mutate_at(vars(pop_density, pop_women, pop_65_plus,
                 survey_price_yenm2, employment_rate,
                 tertiary_sector, secondary_sector,
                 crime_rate, voter_turnout, voteshare,
                 health_care, social_infra, libraries, 
                 schools, green_spacem2,
                 educational, city_hall_facilities,
                 community_centers,sports),
            funs(scale(.)))

z1 <- fish %>% 
  zelig(formula = log(case_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       social_infra + factor(month), model = "ls") 

# Run a standard linear model
z2 <- fish %>% 
  zelig(formula = log(case_rate) ~ 
       # Demographics
       pop_density + pop_women + pop_65_plus +
       # Socioeconomics
       survey_price_yenm2 + employment_rate +
       tertiary_sector + secondary_sector + 
       # Economic Activity
       crime_rate + voter_turnout + voteshare +
       health_care + 
       # Social Infrastructure
       libraries + 
       schools + 
       green_spacem2 +
       educational + 
       city_hall_facilities +
       community_centers + 
       sports + 
       factor(month), model = "ls") 

get_stats = function(simulation){
  data.frame(
    x = simulation$x$ev %>% unlist(),
    x1 = simulation$x1$ev %>% unlist()) %>%
    # Exponentiate the expected values and 
    # calculate the difference (because this can be different from exponentiating the log-fd)
    mutate(
      x = exp(x),
      x1 = exp(x1),
      fd = x1 - x) %>%
    return()
}

out <- bind_rows(
  
  z1 %>%
    sim(x = setx(., social_infra = -2),
        x1 = setx(., social_infra = -1)) %>%
    with(sim.out) %>%
    get_stats() %>%
    mutate(type = "-2 to -1"),
  z1 %>%
    sim(x = setx(., social_infra = -1),
        x1 = setx(., social_infra = 0)) %>%
    with(sim.out) %>%
    get_stats() %>%
    mutate(type = "-1 to 0"),
  z1 %>%
    sim(x = setx(., social_infra = 0),
        x1 = setx(., social_infra = 1)) %>%
    with(sim.out) %>%
    get_stats() %>%
    mutate(type = "0 to 1"),
  z1 %>%
    sim(x = setx(., social_infra = 1),
        x1 = setx(., social_infra = 2)) %>%
    with(sim.out) %>%
    get_stats() %>%
    mutate(type = "1 to 2")) %>%
  mutate(type = factor(type, levels = c(
    "-2 to -1", "-1 to 0", "0 to 1", "1 to 2")))  %>%
  group_by(type) %>%
  filter(fd > quantile(fd, probs = 0.025),
         fd < quantile(fd, probs = 0.975)) %>%
  ungroup() %>%
  mutate(id = 1:n()) %>%
  pivot_longer(cols = c(x, x1), names_to = "level", values_to = "value") %>%
  group_by(id,type) %>%
  mutate(lower = min(value),
         upper = max(value)) %>%
  ungroup()

out %>% 
  ggplot(mapping = aes(x = reorder(id, upper), y = value, ymin = lower, ymax = upper, color = type)) +
  geom_linerange(alpha = 0.95, size = 0.05) +
  geom_point(data = . %>% filter(value == upper), color = "black", size = 0.3) +
  geom_point(data = . %>% filter(value == upper), color = "white", size = 0.01) +
  facet_wrap(~type, scales = "free_x", ncol = 4) +
  guides(color = FALSE) +
  theme_classic(base_size = 14) +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.border = element_rect(fill = NA, color = "black"),
      #  panel.background = element_rect(fill = "black"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.caption = element_text(hjust = 0.5)) +
  labs(x = "1000 Simulated First Differences\nas Social Infrastructure Index Increases",
       y = "Expected COVID-19 Cases\nper 100,000 residents",
       subtitle = "Change in Social Infrastructure (Z-score)",
       caption = "Based on 1000 simulations using the Zelig package,\nholding all other variables at their means or modes.") +
  scale_color_manual(values = c("#648FFF", "#785EF0", "#DC267F", "#FE6100")) +
  ggsave("viz/zelig_social_infrastructure.png", dpi = 500, width = 5.5, height = 4)

```
## Subtypes

```{r}

out <- bind_rows(
  z1 %>%
    setx(social_infra = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Social Infrastructure") %>%
    select(expected_value, x = social_infra, type),
  z2 %>%
    setx(libraries = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Libraries") %>%
    select(expected_value, x = libraries, type),
  z2 %>%
    setx(green_spacem2 = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Green Space") %>%
    select(expected_value, x = green_spacem2, type),
  z2 %>%
    setx(educational = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Museums, Galleries,\nZoos, Aquariums,\n& Botanical Gardens") %>%
    select(expected_value, x = educational, type),
  z2 %>%
    setx(schools = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Schools") %>%
    select(expected_value, x = schools, type),
  z2 %>%
    setx(community_centers = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Community Centers") %>%
    select(expected_value, x = community_centers, type),
  z2 %>%
    setx(city_hall_facilities = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Public Buildings") %>%
    select(expected_value, x = city_hall_facilities, type),
  z2 %>%
    setx(sports = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Sports Grounds") %>%
    select(expected_value, x = sports, type))

out %>%
  mutate(type = factor(type, levels = c(
    "Social Infrastructure", "Libraries", "Green Space", 
    "Museums, Galleries,\nZoos, Aquariums,\n& Botanical Gardens",
    "Sports Grounds", "Schools",
    "Community Centers", "Public Buildings"))) %>%
  mutate(group = case_when(
    type %in% c("Social Infrastructure", "Libraries", "Green Space", 
                "Museums, Galleries,\nZoos, Aquariums,\n& Botanical Gardens") ~ "Beneficial Effects\non Public Health",
    TRUE ~ "Detrimental Effects\non Public Health") %>% factor(levels = c("Beneficial Effects\non Public Health", "Detrimental Effects\non Public Health"))) %>%
  group_by(type, group, x) %>%
  summarize(median = median(exp(expected_value)),
            lower = quantile(exp(expected_value), probs = 0.025),
            upper = quantile(exp(expected_value), probs = 0.975)) %>%
#   ggplot(mapping = aes(x = x, y = exp(expected_value), 
#                       color = type)) +
#  geom_jitter(alpha = 0.10, width = 0.5) +
  ggplot(mapping = aes(x = x, y = median, ymin = lower, ymax = upper, fill = type)) +
 # geom_line() +
  geom_ribbon(color = "white", alpha = 0.5) +
  facet_wrap(~group, nrow = 1) +
  theme_classic(base_size = 14) + 
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.25, "cm"),
        plot.caption = element_text(hjust = 0.5)) +
  labs(x = "Social Infrastructure (Z-Score)",
       y = "Expected COVID-19 Case Rate\nper 100,000 residents",
       fill = "Type",
         caption = "Based on 1000 simulations using the Zelig package,\nholding all other variables at their means or modes.") +
   scale_fill_manual(values = c("#6CED4B", "#3CA96C", "#648FFF", "#785EF0", "#B02986", 
                                "#DC267F", "#FE6100","#FFB000")) +
  ggsave("viz/zelig_subtypes.png", dpi = 500, width = 8, height = 5)
 
```

## Social Capital

```{r}

out <- bind_rows(
  z1 %>%
    setx(crime_rate = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Bonding\nSocial Capital\n(Inverse Crime Rate)\n") %>%
    select(expected_value, x = crime_rate, type),
  z2 %>%
    setx(voter_turnout = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Bridging\nSocial Capital\n(Voter Turnout)\n") %>%
    select(expected_value, x = voter_turnout, type),
  z2 %>%
    setx(voteshare = seq(from = -2, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Linking\nSocial Capital\n(Voteshare for\nWinning Party)") %>%
    select(expected_value, x = voteshare, type)) %>%
  mutate(type = factor(type, levels = c(
    "Bonding\nSocial Capital\n(Inverse Crime Rate)\n",
    "Bridging\nSocial Capital\n(Voter Turnout)\n",
    "Linking\nSocial Capital\n(Voteshare for\nWinning Party)"))) 

outrange <- out %>%
  group_by(type, x) %>%
  summarize(median = median(exp(expected_value)),
            lower = quantile(exp(expected_value), probs = 0.025),
            upper = quantile(exp(expected_value), probs = 0.975)) %>%
  ungroup() %>%
  mutate(x = if_else(type == "Bonding\nSocial Capital\n(Inverse Crime Rate)\n", true = x*-1, false = x)) 


ggplot() +
#  geom_jitter(data = out, mapping = aes(x = x, y = exp(expected_value), 
#                                             color = type), alpha = 0.25) +
  geom_ribbon(data = outrange, mapping = aes(x = x, y = median, ymin = lower, ymax = upper, fill = type),
              color = "white", alpha = 0.5) +
 #facet_wrap(~type, nrow = 1) +
  theme_classic(base_size = 14) + 
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.25, "cm"),
        plot.caption = element_text(hjust = 0.5)) +
  labs(x = "Social Capital (Z-Score)",
       y = "Expected COVID-19 Case Rate\nper 100,000 residents",
       fill = "Type",
         caption = "Based on 1000 simulations using the Zelig package,\nholding all other variables at their means or modes.") +
   scale_fill_manual(values = c("#648FFF","#DC267F", "#FFB000")) +
  guides(color = FALSE) +
  ggsave("viz/zelig_social_capital.png", dpi = 500, width = 8, height = 5)
 
```





